/* 
Input files: fw_new_follows

Output files: declarations, user_focus (T8), mktret10day (T9)
*/

**********************************************
clear all
macro drop _all
scalar drive		= "E:"
scalar maindir		= "`=drive'\Replicate"
scalar crspdir		= "`=drive'\Replicate"
scalar tabledir		= "`=drive'\Replicate\TablesRep"
cd "`=maindir'"
**********************************************

use fw_new_follows, clear  // user-symbol-day level dataset
	drop net*
foreach xx of varlist _all {
	rename `xx' `=lower("`xx'")'   // "
}

foreach var of varlist bull bear {
	qui replace `var'=0 if `var'==.  
}
label var date "date of t0"
label var userid "ID of focal user"
label var bull "Focal user declares bullish in t0"
label var bear "Focal user declares bearish in t0"

g year=year(date)
g month=month(date)
g int ym=ym(year,month)
format ym %tm
drop year month

save declarations, replace // user-symbol-day level dataset with declarations (max 1 per user-symbol-day)


**************************************************************
**************************************************************


use declarations, clear

g year=year(date)
g month=month(date)
g ymmax=ym(year,month)
	format ymmax %tm
gen ymmin=ymmax
gunique ymmax, by(userid)
ren _Unique ymcount
gunique date, by(userid)
ren _Unique datecount

gen ndecl=1
gen datemin=date
ren date datemax

gcollapse (first) datecount ymcount (min) datemin ymmin (max) datemax ymmax (sum) ndecl, by(userid) 
	
gen ndays=datemax-datemin+1
gen nmonths=ymmax-ymmin+1

gen factor = ymcount / nmonths

keep userid factor
compress
save temp_nmonths, replace  // used below


**************************************************************
**************************************************************


/* Make user level variable: how focused? i.e. N of declarations about different symbols per month, converted into a percentile, averaged over time per person - Used in Table 8 (user characteristics) */

use declarations, clear
gunique symbolid, by(userid ym)
ren _Unique nuniq_decl

gcollapse (sum) bull bear (first) nuniq_decl (count) symbolid, by(userid ym)
	drop bull bear
	ren symbolid n_decl
		label var n_decl "N declarations/month by user"
		label var nuniq_decl "N of symbols declared about/month by user"
	
bys userid: egen focus= mean(nuniq_decl)
	label var focus "User's N of symbols declared/month, averaged over time"
bys userid: egen n_dec= mean(n_decl)
	label var n_dec "User's N of declarations/month, averaged over time"
keep userid focus 
gduplicates drop // keep only 1 record/user

merge 1:1 userid using temp_nmonths, keep(match) nogen

sum focus, d
replace focus=focus*factor
sum focus, d

gen byte focus_p50=cond(focus>1,1,0) // p50 ... no missings
gen byte focus_p75=cond(focus>2,1,0) // p75 
gen byte focus_p90=cond(focus>3.7,1,0) // p90
gen byte focus_p95=cond(focus>5.67,1,0) // p95

gen focus_1orless=cond(focus<=1,1,0)
drop focus factor
save user_focus, replace	

	
	
**************************************************************
**************************************************************


/* Prep S&P return to use for Table 9 */


cd "`=crspdir'"
use CRSP_18_19, clear // already sorted by permno date
keep if date>td(31dec2018)
append using CRSP2020jan_jun 
append using CRSP_07_18 
cd "`=maindir'"

keep date sprtrn vwretd 
keep if date>td(01jan2013)
gduplicates drop

merge 1:1 date using mktdate_0719, nogen  

tsset mktdate	

foreach variable in sprtrn vwretd { 
	forval k=1/10 {
	gen `variable'_f`k'=f`k'.`variable'
	}
gen double `variable'_10d_f2= (1+`variable'_f10)*(1+`variable'_f9)*(1+`variable'_f8)*(1+`variable'_f7)*(1+`variable'_f6)*(1+`variable'_f5)*(1+`variable'_f4)*(1+`variable'_f3)*(1+`variable'_f2)-1
}
keep date sprtrn_10d_f2 vwretd_10d_f2
save mktret10day, replace


